cd ../src
import os as os
import pickle as pickle
import subprocess
import pandas as pd
from Reports.Figures import *
from Processing.Tests import *
from Reports.NotebookTools import *
pd.set_option('precision',3)
drugs = {'TKI': ['perifosine', 'sunitinib', 'sorafenib', 'pazopanib', 'sutent', 'tarceva','nexavaar',
'sutent (sunitinib)', 'gefitinib', 'nexavar', 'bay-439006',
'azd', 'iressa', 'sorafenib - nexavar', 'axitinib', 'sunitinib (sutent)', 'tipifarnib',
'tyrosine kinase inhibitor', 'votrient', 'zd6474'],
'VEGF Ab': ['bevacizumab', 'avastin'],
'mTORi': ['temsirolimus','everolimus','rad001','torisel','afinitor'],
'IL2/IF': ['interferon', 'il-2','il-2 (high dose)','proleukin (il-2)',
'interleukin-2','interferon-alpha','interferon alpha',
'intron a', 'alpha interferon', 'proleukin'
'roferon-a','il-2 thearpy (interleukin)','high dose interleukin-2',
'ifn-alpha (intron)', 'interleukin 2-high dose', 'inf'],
'Chemo': ['bortezomib', 'gemictiabine', '5-fluorouracil','capecitabine','gemzar','thalidomide','nab-rapamycin',
'capecitabin', 'gemcitabine','xeloda'],
'Vaccine': ['oncophage', 'oncophage vaccine']}
drug_map = {drug:family for family, drugs in drugs.items() for drug in drugs}
result_path = '/scratch/TCGA/Firehose__2012_01_16/ucsd_analyses'
run = sorted(os.listdir(result_path))[1]
run = pickle.load(open('/'.join([result_path, run, 'RunObject.p']), 'rb'))
cancer = run.load_cancer('KIRC')
clinical = cancer.load_clinical()
global_vars = cancer.load_global_vars()
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa = cancer.load_data('RPPA')
stage = clinical.clinical.tumor_stage
stage = stage.map(lambda s: s.replace('stge', 'stage'))
stage.value_counts().sort_index().plot(kind='bar')
stage = clinical.clinical.tumor_stage.map({'stge i': 'Stage I', 'stge ii': 'Stage II', 'stge iii': 'Stage III', 'stge iv': 'Stage IV'})
surv = clinical.survival.survival_5y
f = stage
t = get_surv_fit(surv, f)
f.name = 'Overall Survival'
f = draw_survival_curves(f, surv, colors=['green','blue','orange','red'], labels=list(f.unique()), show=True)
fig_tab(f, t)
fig, axs = subplots(1,2, figsize=(12,4))
v = clinical.clinical.tumor_grade
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[0]);
v = clinical.clinical.tumor_grade.dropna().map(lambda s: s[:2])
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[1]);
axs[0].set_ylabel('# of Patients')
age = clinical.clinical.age
by_stage = pd.DataFrame({s: age[stage[stage==s].index].describe() for s in stage.unique()})
all_stage = pd.Series(clinical.clinical.age.describe(), name='All')
by_stage.join(all_stage).astype(object)
age.hist()
ylabel('# of Patients')
xlabel('Age')
draw_survival_curves(age, surv, show=True)
by_stage = pd.DataFrame({s: age[stage[stage==s].index].describe() for s in stage.unique()})
all_stage = pd.Series(clinical.clinical.age.describe(), name='All')
tab = by_stage.join(all_stage).astype(object)
fig = draw_survival_curves(age, surv, stage, show=True)
fig_tab(fig, tab)
clinical.clinical.gender.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Gender')
pd.crosstab(stage, clinical.clinical.calcium_level)
s = pd.crosstab(stage, clinical.clinical.calcium_level).ix['stage iv']
s[['low','normal','elevated']].plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
pd.crosstab(stage, clinical.clinical.eastern_cancer_oncology_group.dropna())
clinical.clinical.hemoglobin.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Hemoglobin Level')
drugs_types = drugs.keys()
drug_categories = clinical.drugs.drugname.map(drug_map)
drug_given = pd.DataFrame({d: ((drug_categories == d).groupby(level=0).sum() > 0) for d in drugs_types})
fig, axs = subplots(1,2, figsize=(12,4))
crosstab(stage, drug_given.sum(1) > 0)[True].plot(kind='bar', ax=axs[0])
axs[0].set_ylabel('# of Patients')
axs[0].set_title('Patients Receiving Medication By Stage')
drug_given.sum().plot(kind='bar', ax=axs[1]);
axs[1].set_title('Drug Categories');
fig, axs = subplots(1,2, figsize=(12,4))
s = drug_given.ix[stage.index[stage == 'Stage IV']].sum()
s.plot(kind='bar', ax=axs[0])
axs[0].set_ylabel('# of Patients')
axs[0].set_title('Drugs Given in Stage IV')
n = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1).value_counts()[:5]
n.plot(kind='bar')
axs[1].set_title('Drugs Given per Patient')
gc = drug_given.astype(int).astype(str).apply(lambda s: ''.join(s), axis=1)
gc.name = 'drugs'
one_drug = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1) == 1
vc = drug_given.ix[one_drug[one_drug].index].sum().order()
vc.plot(kind='bar')
ylabel('# of Patients');
split_cols = lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1'])
t = get_surv_fit(surv, gc[one_drug[one_drug].index])
t.index = map(split_cols, t.index)
f = draw_survival_curves(gc[one_drug[one_drug].index], surv, colors=['red','orange','green','purple','blue','yellow'],
labels=[c for c in drug_given.columns if vc[c] > 0][::-1], show=True, show_legend='out')
fig_tab(f,t)
vhl_mut = mut.df.ix['VHL'].map({0:'WT',1:'Mutated'})
vhl_mut.name = 'VHL_mut'
vhl_meth = meth.df.ix['VHL']
vhl_meth.name = 'VHL_meth'
vhl_rna = rna.df.ix['VHL']
vhl_rna.name = 'VHL_rna'
a = draw_survival_curves(vhl_mut, surv, ann='p', show=True)
b = draw_survival_curves(vhl_mut, surv, stage, ann='p', show=True)
stack([a,b])
labels = Series({-2: 'Homozygous Deletion', -1: 'Deletion', 0: 'Normal', 1: 'Amp', 2: 'High Amp'})
colors = Series({-2: 'black', -1: 'purple', 0: 'blue', 1: 'orange', 2: 'red'})
cdk_del = cn.df.ix['Deletion'].ix['9p21.3'].ix[0]
cdk_del.name = 'del_band'
f = cdk_del
a = draw_survival_curves(f, surv, ann='p', show=True, colors=colors[sorted(f.unique())].tolist(),
labels=labels[sorted(f.unique())].tolist())
b = draw_survival_curves(f, surv, stage, ann='p', show=True, colors=colors[sorted(f.unique())].tolist(),
labels=labels[sorted(f.unique())].tolist())
stack([a,b])
metastatic = stage[stage == 'Stage IV'].index
met = (mut.df.ix[:,metastatic] > 0).sum(1).order()
met = met[met>2]
g = (mut.df > 0).sum(1).order()
g = g.ix[met.index]
m = pd.concat([met, g-met, g], keys=['Metastatic','Non-Metastatic', 'All'], axis=1)
figsize(15,4)
g = (mut.df.ix[:,metastatic] > 0).sum(1).order()
g = g[g>2]
g.plot(kind='bar')
ylabel('# of Patients')
pathway_plot(mut.df.ix[g[g>3].index,metastatic], False)
metastatic = stage[stage == 'Stage IV'].index
survival_test = 'survival_5y'
covariates = ['age', ('mutation', 'rate_non')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
df = mut.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
counts = Series(df.sum(1), name='counts')
df = df[counts > 6]
mut_met = run_feature_matrix(df, test)
del mut_met[('Full','fmla')]
mut_met = mut_met.join(counts).sort(columns=[('Full','LR')])
mut_met.head(10).astype(object)
def draw_me(f):
split_by_stage = draw_survival_curves(mut.features.ix[f], surv, stage, ann='p', show=True)
all_surv = draw_survival_curves(mut.features.ix[f], surv, ann='p', show=True)
curves = draw_survival_curves(mut.features.ix[f, df.columns], surv, ann='p', filename='tmp.png', show=True)
try:
figsize=(6,4)
pathway_plot(mut.df.ix[run.gene_sets[f], df.columns], plt.gca())
plt.tight_layout()
plt.savefig('tmp1.png', dpi=75, bbox_inches=0, pad_inches=0)
plt.close('all')
return stack([side_by_side(['tmp.png', 'tmp1.png']), split_by_stage, all_surv])
except:
return stack([curves, split_by_stage, all_surv])
s = stack([draw_me(f) for f in mut_met.index[:15]])
s
survival_test = 'survival_5y'
covariates = ['age']
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
df = rppa.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
rppa_met = run_feature_matrix(df, test)
rppa_met = rppa_met.join(counts).sort(columns=[('Full','LR')])
rppa_met.head(10)
def draw_me(f):
feature = rppa.features.ix[f, df.columns]
feature.name = str(feature.name)
curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
feature = rppa.features.ix[f]
feature.name = str(feature.name)
split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
return stack([curves, split_by_stage, all_surv])
s = stack([draw_me(f) for f in rppa_met.index[:10]])
s
survival_test = 'survival_5y'
covariates = ['age', ('methylation', 'pc1')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
df = meth.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
meth_met = run_feature_matrix(df, test)
meth_met = meth_met.sort(columns=[('Full','LR')])
meth_met.head(10)
def draw_me(f):
feature = meth.features.ix[f, df.columns]
feature.name = str(feature.name)
curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
feature = meth.features.ix[f]
feature.name = str(feature.name)
split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, f))
all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
return stack([curves, split_by_stage, all_surv])
s = stack([draw_me(f) for f in meth_met.index[:10]])
s